Sample summary
Summary of sampled individuals and analysed faecal samples.
#number of samples
length(sample_metadata$sample)
[1] 190
#number of samples by species
sample_metadata %>%
group_by(species) %>%
summarise(n_samples = length(sample)) %>%
tt()
tinytable_skcgnoo0xvp322ouw0sz
| species |
n_samples |
| Sciurus carolinensis |
80 |
| Sciurus vulgaris |
110 |
#number of samples by species and sex
sample_metadata %>%
group_by(species, sex) %>%
summarise(n_samples = length(sample)) %>%
tt()
tinytable_aqxlgtb56wl678667ikk
| species |
sex |
n_samples |
| Sciurus carolinensis |
F |
44 |
| Sciurus carolinensis |
M |
36 |
| Sciurus vulgaris |
F |
63 |
| Sciurus vulgaris |
M |
47 |
#number of samples by species and development
sample_metadata %>%
group_by(species, development) %>%
summarise(n_samples = length(sample)) %>%
tt()
tinytable_yhzj0bs13lujf6dgk3au
| species |
development |
n_samples |
| Sciurus carolinensis |
Adult |
71 |
| Sciurus carolinensis |
Juvenile |
2 |
| Sciurus carolinensis |
Nursing |
3 |
| Sciurus carolinensis |
Pregnant |
4 |
| Sciurus vulgaris |
Adult |
90 |
| Sciurus vulgaris |
Juvenile |
1 |
| Sciurus vulgaris |
Nursing |
8 |
| Sciurus vulgaris |
Pregnant |
11 |
#number of samples by species and type of area
sample_metadata %>%
group_by(species,area_type) %>%
summarise(n_samples = length(sample)) %>%
tt()
tinytable_3fq7x2hxd53z5555fqey
| species |
area_type |
n_samples |
| Sciurus carolinensis |
rural |
29 |
| Sciurus carolinensis |
suburban |
24 |
| Sciurus carolinensis |
urban |
27 |
| Sciurus vulgaris |
rural |
37 |
| Sciurus vulgaris |
suburban |
30 |
| Sciurus vulgaris |
urban |
43 |
#number of distinct squirrels
n_distinct(sample_metadata$animal)
[1] 108
#number of squirrels by species and type of area
sample_metadata %>%
group_by(species,area_type) %>%
summarise(distinct_squirrels = n_distinct(animal)) %>%
tt()
tinytable_krt9bk2qjmezd56yeffi
| species |
area_type |
distinct_squirrels |
| Sciurus carolinensis |
rural |
14 |
| Sciurus carolinensis |
suburban |
13 |
| Sciurus carolinensis |
urban |
18 |
| Sciurus vulgaris |
rural |
21 |
| Sciurus vulgaris |
suburban |
14 |
| Sciurus vulgaris |
urban |
28 |
#number of squirrels by species and season
sample_metadata %>%
group_by(species,season) %>%
summarise(distinct_squirrels = n_distinct(animal)) %>%
tt()
tinytable_uupq62pg132lbfc9mb1i
| species |
season |
distinct_squirrels |
| Sciurus carolinensis |
autumn |
33 |
| Sciurus carolinensis |
spring-summer |
22 |
| Sciurus carolinensis |
winter |
25 |
| Sciurus vulgaris |
autumn |
39 |
| Sciurus vulgaris |
spring-summer |
38 |
| Sciurus vulgaris |
winter |
33 |
#n of analysed faecal samples
ncol(read_counts)
[1] 191
Geographical location of sampled red squirrel (light blue) and grey squirrel (pink) populations in Italy.
#Summarise for generating map
options(dplyr.summarise.inform = FALSE)
sample_metadata_summary <- sample_metadata %>%
#Group by geography and count samples
select(sample, latitude, longitude, country, species) %>%
group_by(latitude, longitude, species) %>%
summarize(count = n()) %>%
ungroup()
italy <- map_data("world", region="italy") %>%
summarise(long = mean(long), lat = mean(lat))
#plotting on map
sample_metadata_summary %>%
ggplot(.) +
#render map
geom_map(
data=map_data("world", region="italy"),
map = map_data("world", region="italy"),
aes(long, lat, map_id=region),
color = "white", fill = "#e6e6e6", linewidth = 0.2
) +
#render points
geom_point(
aes(x=longitude,y=latitude, color=species),
alpha=0.7, shape=16) +
scale_color_manual(values=squirrel_colors) +
#add general plot layout
theme_minimal() +
theme(legend.position = "right",
axis.title.x=element_blank(),
axis.title.y=element_blank()
) + coord_map("mercator")

Sequencing data summary
Total amount of sequencing data generated from the analysed samples.
#amount of discarded data (GB)
sum(round(((sample_metadata$metagenomic_bases+sample_metadata$host_bases)/
(1-sample_metadata$bases_lost_fastp_percent))-
(sample_metadata$metagenomic_bases+sample_metadata$host_bases)))/1000000000
[1] 63.90045
#amount of host data (GB)
sum(sample_metadata$host_bases)/1000000000
[1] 166.0275
#amount of metagenomic data (GB)
sum(sample_metadata$metagenomic_bases)/1000000000
[1] 786.1584
#amount of estimated prokaryotic data (singleM)
sum(sample_metadata$metagenomic_bases * sample_metadata$singlem_fraction)/1000000000
[1] 557.7475
Origin of DNA sequences obtained from each sample.
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
mutate(mags_bases = mags*146) %>%
mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample,mags_bases,unmapped_bases,host_bases,lowqual_bases)
mags_bases_mean <- sequence_fractions %>%
mutate(mags_bases = mags_bases / 1000000000) %>%
select(mags_bases) %>%
pull() %>%
mean()
sequence_fractions %>%
pivot_longer(!sample, names_to = "fraction", values_to = "value") %>%
mutate(value = value / 1000000000) %>%
mutate(fraction = factor(fraction, levels = c("lowqual_bases","host_bases","unmapped_bases","mags_bases"))) %>%
ggplot(., aes(x = sample, y = value, fill=fraction)) +
geom_bar(position="stack", stat = "identity") +
scale_fill_manual(values=c("#CCCCCC","#178a94","#ee8080","#d03161")) +
geom_hline(yintercept = mags_bases_mean, linetype = "dashed", color = "black") +
labs(x = "Samples", y = "Amount of data (GB)") +
theme_classic() +
theme(axis.text.x = element_blank())

Sequencing assessment: difference between mapping rate and estimated singleM proportion
# Estimated vs mapped prokaryotic fraction
sequence_fractions <- read_counts %>%
pivot_longer(-genome, names_to = "sample", values_to = "value") %>%
group_by(sample) %>%
summarise(mags = sum(value)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
select(sample,mags,metagenomic_bases,host_bases,bases_lost_fastp_percent) %>%
mutate(mags_bases = mags*146) %>%
mutate(lowqual_bases = ((metagenomic_bases+host_bases)/(1-bases_lost_fastp_percent))-(metagenomic_bases+host_bases)) %>%
mutate(unmapped_bases = metagenomic_bases - mags_bases) %>%
mutate(unmapped_bases = ifelse(unmapped_bases < 0, 0, unmapped_bases)) %>%
select(sample,mags_bases,unmapped_bases,host_bases,lowqual_bases)
singlem_table <- sequence_fractions %>%
mutate(mags_proportion = round((mags_bases / (mags_bases + unmapped_bases))*100,2)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(singlem_proportion = round(singlem_fraction*100,2)) %>%
select(sample,mags_proportion,singlem_proportion) %>%
mutate(mags_proportion = ifelse(singlem_proportion == 0, 0, mags_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion == 0, NA, singlem_proportion)) %>% #convert zeros to NA
mutate(singlem_proportion = ifelse(singlem_proportion < mags_proportion, NA, singlem_proportion)) %>% #if singlem is smaller, then NA, to simplify plot
mutate(singlem_proportion = ifelse(singlem_proportion > 100, 100, singlem_proportion)) #simplify
singlem_table %>%
pivot_longer(!sample, names_to = "proportion", values_to = "value") %>%
mutate(proportion = factor(proportion, levels = c("mags_proportion","singlem_proportion"))) %>%
ggplot(., aes(x = value, y = sample, color=proportion)) +
geom_line(aes(group = sample), color = "#f8a538") +
geom_point() +
scale_color_manual(values=c("#52e1e8","#876b53")) +
theme_classic() +
labs(y = "Samples", x = "Prokaryotic fraction (%)") +
theme(axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 1, size=6), axis.text.y= element_blank(), legend.position = "right")

# Export difference b/w mags and singlem proportions to be used later in script 05-diversity_models
singlem <- sequence_fractions %>%
mutate(mags_proportion = round((mags_bases / (mags_bases + unmapped_bases))*100,2)) %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
mutate(singlem_proportion = round(singlem_fraction*100,2)) %>%
mutate(mags_singlem = mags_proportion/singlem_proportion) %>%
mutate(est_mapp = ifelse(mags_singlem >= 1, 1, mags_singlem)) %>%
select(sample,mags_singlem,est_mapp)
write.table(singlem, file = "data/singlem.csv", row.names = FALSE, dec = ".", sep = ";",
quote = FALSE)